Welcome to the fourth section of our comprehensive training program! This section focuses on handling time series (temporal) data in R. Time series analysis is crucial for understanding patterns, trends, and seasonality in data collected over time. This manual will guide you through practical techniques to handle temporal data efficiently.
By the end of this training, you will be able to:
Time series data is a sequence of observations recorded at regular or irregular time intervals. Common examples include:
# Create a sample time series to demonstrate patterns
set.seed(123)
n <- 100
time_points <- 1:n
# Create different components
trend_component <- 0.05 * time_points
seasonal_component <- 5 * sin(2 * pi * time_points / 12)
random_component <- rnorm(n, 0, 2)
# Combine components
sample_ts <- trend_component + seasonal_component + random_component
# Create visualization data
ts_data <- data.frame(
Time = rep(time_points, 4),
Value = c(trend_component, seasonal_component, random_component, sample_ts),
Component = rep(c("Trend", "Seasonality", "Random", "Combined"), each = n)
)
# Plot components
p_components <- ggplot(ts_data, aes(x = Time, y = Value, color = Component)) +
geom_line(linewidth = 0.8) +
facet_wrap(~Component, scales = "free_y", ncol = 2) +
labs(title = "Time Series Components",
subtitle = "Decomposition of a time series into its basic components",
x = "Time",
y = "Value") +
theme_minimal() +
theme(legend.position = "none") +
scale_color_brewer(palette = "Set1")
print(p_components)
## 1. Temporal Data Structures in R
1. Date class – for calendar dates
## [1] "2024-01-01" "2024-02-15" "2024-12-31"
## [1] "Date"
2. POSIXct class – for date-times with timezone
# POSIXct class
date_times <- as.POSIXct(c("2024-01-01 09:30:00", "2024-01-01 14:45:00"))
format(date_times, "%Y-%m-%d %H:%M:%S")## [1] "2024-01-01 09:30:00" "2024-01-01 14:45:00"
## [1] "POSIXct" "POSIXt"
Using lubridate for date manipulation
# Parsing dates from different formats
date_strings <- c("2024-01-15", "15/01/2024", "January 15, 2024", "15 Jan 2024")
parsed <- tibble(
Input = date_strings,
Parsed = c(
as.character(ymd(date_strings[1])),
as.character(dmy(date_strings[2])),
as.character(mdy(date_strings[3])),
as.character(dmy(date_strings[4]))
)
)
parsed## # A tibble: 4 × 2
## Input Parsed
## <chr> <chr>
## 1 2024-01-15 2024-01-15
## 2 15/01/2024 2024-01-15
## 3 January 15, 2024 2024-01-15
## 4 15 Jan 2024 2024-01-15
# Extracting date components
sample_dates <- ymd(c("2024-01-15", "2024-06-30", "2024-12-25"))
components <- tibble(
Date = sample_dates,
Year = year(sample_dates),
Month = month(sample_dates, label = TRUE),
Day = day(sample_dates),
Weekday = wday(sample_dates, label = TRUE),
Quarter = quarter(sample_dates)
)
components## # A tibble: 3 × 6
## Date Year Month Day Weekday Quarter
## <date> <dbl> <ord> <int> <ord> <int>
## 1 2024-01-15 2024 Jan 15 Mon 1
## 2 2024-06-30 2024 Jun 30 Sun 2
## 3 2024-12-25 2024 Dec 25 Wed 4
start_date <- ymd("2024-01-01")
results <- tibble(
Description = c("Start date", "Add 30 days", "Add 2 months", "End of month"),
Value = c(
as.character(start_date),
as.character(start_date + days(30)),
as.character(start_date + months(2)),
as.character(ceiling_date(start_date, "month") - days(1))
)
)
results## # A tibble: 4 × 2
## Description Value
## <chr> <chr>
## 1 Start date 2024-01-01
## 2 Add 30 days 2024-01-31
## 3 Add 2 months 2024-03-01
## 4 End of month 2024-01-31
Below are examples of creating regular time sequences (daily, weekly, monthly) starting at 2024-01-01.
start_date <- ymd("2024-01-01")
# Daily sequence for one month
daily_seq <- seq(start_date, by = "day", length.out = 30)
# Show first 10
daily_seq[1:10]## [1] "2024-01-01" "2024-01-02" "2024-01-03" "2024-01-04" "2024-01-05"
## [6] "2024-01-06" "2024-01-07" "2024-01-08" "2024-01-09" "2024-01-10"
## [1] "2024-01-01" "2024-01-08" "2024-01-15" "2024-01-22" "2024-01-29"
## [6] "2024-02-05" "2024-02-12" "2024-02-19" "2024-02-26" "2024-03-04"
## [11] "2024-03-11" "2024-03-18"
# Monthly sequence (12 months)
monthly_seq <- seq(start_date, by = "month", length.out = 12)
monthly_seq## [1] "2024-01-01" "2024-02-01" "2024-03-01" "2024-04-01" "2024-05-01"
## [6] "2024-06-01" "2024-07-01" "2024-08-01" "2024-09-01" "2024-10-01"
## [11] "2024-11-01" "2024-12-01"
We create one year of daily data with a linear trend, annual and weekly seasonality, and random noise. We also generate a temperature series.
set.seed(123)
n_days <- 365 # One year of daily data
sample_ts <- tibble(
date = seq(ymd("2024-01-01"), by = "day", length.out = n_days),
# Components
trend = 0.1 * (1:n_days), # Linear trend
seasonal = 20 * sin(2 * pi * (1:n_days)/365), # Yearly seasonality
weekly_pattern = 5 * sin(2 * pi * (1:n_days)/7), # Weekly pattern
noise = rnorm(n_days, 0, 3), # Random noise
# Combined series
sales = 100 + trend + seasonal + weekly_pattern + noise,
temperature = 15 + 10 * sin(2 * pi * (1:n_days)/365) + rnorm(n_days, 0, 2)
) %>%
mutate(
month = month(date, label = TRUE),
weekday = wday(date, label = TRUE),
is_weekend = weekday %in% c("Sat", "Sun")
)
# Summary information
list(
date_range = paste(min(sample_ts$date), "to", max(sample_ts$date)),
observations = nrow(sample_ts),
variables = c("sales", "temperature")
)## $date_range
## [1] "2024-01-01 to 2024-12-30"
##
## $observations
## [1] 365
##
## $variables
## [1] "sales" "temperature"
## # A tibble: 10 × 10
## date trend seasonal weekly_pattern noise sales temperature month
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <ord>
## 1 2024-01-01 0.1 0.344 3.91e+ 0 -1.68 103. 14.8 Jan
## 2 2024-01-02 0.2 0.688 4.87e+ 0 -0.691 105. 16.1 Jan
## 3 2024-01-03 0.3 1.03 2.17e+ 0 4.68 108. 13.6 Jan
## 4 2024-01-04 0.4 1.38 -2.17e+ 0 0.212 99.8 17.4 Jan
## 5 2024-01-05 0.5 1.72 -4.87e+ 0 0.388 97.7 14.9 Jan
## 6 2024-01-06 0.6 2.06 -3.91e+ 0 5.15 104. 20.9 Jan
## 7 2024-01-07 0.7 2.40 -1.22e-15 1.38 104. 12.9 Jan
## 8 2024-01-08 0.8 2.75 3.91e+ 0 -3.80 104. 15.4 Jan
## 9 2024-01-09 0.9 3.09 4.87e+ 0 -2.06 107. 18.2 Jan
## 10 2024-01-10 1 3.43 2.17e+ 0 -1.34 105. 17.7 Jan
## # ℹ 2 more variables: weekday <ord>, is_weekend <lgl>
We create four visualizations: (1) basic time series, (2) series with trend component, (3) sales by month, and (4) sales by weekday.
# Basic time series plot
p1 <- ggplot(sample_ts, aes(x = date, y = sales)) +
geom_line(color = "blue", linewidth = 0.8) +
labs(title = "Sales Over Time", x = "Date", y = "Sales") +
theme_minimal()
# Plot with components
p2 <- ggplot(sample_ts, aes(x = date)) +
geom_line(aes(y = sales), color = "blue", alpha = 0.5, linewidth = 0.5) +
geom_line(aes(y = trend + 100), color = "red", linewidth = 1) +
labs(title = "Sales with Trend Component", x = "Date", y = "Value") +
theme_minimal()
# Monthly patterns
p3 <- ggplot(sample_ts, aes(x = month, y = sales, group = month)) +
geom_boxplot(fill = "lightblue") +
labs(title = "Sales by Month", x = "Month", y = "Sales") +
theme_minimal()
# Daily patterns
p4 <- ggplot(sample_ts, aes(x = weekday, y = sales, group = weekday)) +
geom_boxplot(fill = "lightgreen") +
labs(title = "Sales by Weekday", x = "Weekday", y = "Sales") +
theme_minimal()
grid.arrange(p1, p2, p3, p4, ncol = 2)1) Daily to weekly aggregation
weekly_agg <- sample_ts %>%
mutate(week_start = floor_date(date, "week")) %>%
group_by(week_start) %>%
summarise(
avg_sales = mean(sales),
total_sales = sum(sales),
obs_count = n(),
.groups = 'drop'
)
head(weekly_agg, 5)## # A tibble: 5 × 4
## week_start avg_sales total_sales obs_count
## <date> <dbl> <dbl> <int>
## 1 2023-12-31 103. 617. 6
## 2 2024-01-07 104. 731. 7
## 3 2024-01-14 108. 753. 7
## 4 2024-01-21 108. 759. 7
## 5 2024-01-28 114. 799. 7
2) Daily to monthly aggregation
monthly_agg <- sample_ts %>%
mutate(month_start = floor_date(date, "month")) %>%
group_by(month_start) %>%
summarise(
avg_sales = mean(sales),
total_sales = sum(sales),
avg_temp = mean(temperature),
.groups = 'drop'
)
monthly_agg## # A tibble: 12 × 4
## month_start avg_sales total_sales avg_temp
## <date> <dbl> <dbl> <dbl>
## 1 2024-01-01 107. 3324. 17.5
## 2 2024-02-01 119. 3454. 21.8
## 3 2024-03-01 127. 3924. 24.5
## 4 2024-04-01 130. 3890. 25.2
## 5 2024-05-01 127. 3940. 21.8
## 6 2024-06-01 122. 3660. 17.8
## 7 2024-07-01 115. 3569. 12.6
## 8 2024-08-01 108. 3344. 7.34
## 9 2024-09-01 107. 3217. 4.96
## 10 2024-10-01 111. 3430. 5.75
## 11 2024-11-01 119. 3562. 7.49
## 12 2024-12-01 130. 3904. 12.5
3) Aggregation by weekday
weekday_agg <- sample_ts %>%
group_by(weekday) %>%
summarise(
avg_sales = mean(sales),
min_sales = min(sales),
max_sales = max(sales),
.groups = 'drop'
)
weekday_agg## # A tibble: 7 × 4
## weekday avg_sales min_sales max_sales
## <ord> <dbl> <dbl> <dbl>
## 1 Sun 119. 104. 139.
## 2 Mon 122. 103. 143.
## 3 Tue 122. 105. 137.
## 4 Wed 120. 105. 144.
## 5 Thu 116. 99.8 132.
## 6 Fri 114. 97.7 132.
## 7 Sat 116. 99.6 133.
p_agg1 <- ggplot() +
geom_line(data = sample_ts, aes(x = date, y = sales),
color = "gray", alpha = 0.5, linewidth = 0.5) +
geom_line(data = weekly_agg, aes(x = week_start, y = avg_sales),
color = "red", linewidth = 1) +
labs(title = "Daily Data with Weekly Averages", x = "Date", y = "Sales") +
theme_minimal()
p_agg2 <- ggplot(monthly_agg, aes(x = month_start, y = total_sales)) +
geom_col(fill = "steelblue") +
labs(title = "Monthly Total Sales", x = "Month", y = "Total Sales") +
theme_minimal()
grid.arrange(p_agg1, p_agg2, ncol = 2)Time series decomposition separates data into components:
Y(t) = Trend(t) + Seasonal(t) + Random(t)
set.seed(123)
n <- 120 # 10 years of monthly data
time <- 1:n
# Create components
trend_comp <- 0.5 * time
seasonal_comp <- 10 * sin(2 * pi * time / 12)
random_comp <- rnorm(n, 0, 3)
# Combine components
ts_data <- trend_comp + seasonal_comp + random_comp
# Visualize components separately
components_df <- tibble(
time = rep(time, 4),
value = c(ts_data, trend_comp, seasonal_comp, random_comp),
component = rep(c("Combined", "Trend", "Seasonal", "Random"), each = n)
)
p_components <- ggplot(components_df, aes(x = time, y = value)) +
geom_line(linewidth = 0.8, color = "blue") +
facet_wrap(~component, scales = "free_y", ncol = 2) +
labs(title = "Time Series Components", subtitle = "Combined = Trend + Seasonal + Random",
x = "Time", y = "Value") +
theme_minimal()
p_components# Convert to time series object
ts_object <- ts(ts_data, frequency = 12)
# Perform decomposition
decomp_result <- decompose(ts_object, type = "additive")
# Plot decomposition
par(mfrow = c(4, 1), mar = c(3, 4, 2, 2))
plot(decomp_result)# STL is more robust for many time series
stl_result <- stl(ts_object, s.window = "periodic")
# Plot STL decomposition
par(mfrow = c(4, 1), mar = c(3, 4, 2, 2))
plot(stl_result, main = "STL Decomposition")# Extract components
components_stl <- tibble(
time = time,
observed = as.numeric(ts_object),
trend = as.numeric(stl_result$time.series[, "trend"]),
seasonal = as.numeric(stl_result$time.series[, "seasonal"]),
remainder = as.numeric(stl_result$time.series[, "remainder"])
)
# Component statistics
components_stl %>%
summarise(
Trend_Mean = mean(trend), Trend_SD = sd(trend),
Seasonal_Mean = mean(seasonal), Seasonal_SD = sd(seasonal),
Remainder_Mean = mean(remainder), Remainder_SD = sd(remainder)
)## # A tibble: 1 × 6
## Trend_Mean Trend_SD Seasonal_Mean Seasonal_SD Remainder_Mean Remainder_SD
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 30.3 17.2 -0.0000000610 6.88 0.00498 2.39
We use the Australian tourism dataset (from the
tsibbledata package, loaded via fpp3).
## # A tsibble: 24,320 x 5 [1Q]
## # Key: Region, State, Purpose [304]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Adelaide South Australia Business 135.
## 2 1998 Q2 Adelaide South Australia Business 110.
## 3 1998 Q3 Adelaide South Australia Business 166.
## 4 1998 Q4 Adelaide South Australia Business 127.
## 5 1999 Q1 Adelaide South Australia Business 137.
## # ℹ 24,315 more rows
# Basic information
list(
time_period = paste(min(tourism$Quarter), "to", max(tourism$Quarter)),
total_observations = nrow(tourism),
n_series = n_distinct(tourism$Region) * n_distinct(tourism$Purpose),
variables = names(tourism)
)## $time_period
## [1] "1998 Q1 to 2017 Q4"
##
## $total_observations
## [1] 24320
##
## $n_series
## [1] 304
##
## $variables
## [1] "Quarter" "Region" "State" "Purpose" "Trips"
## # A tsibble: 10 x 5 [1Q]
## # Key: Region, State, Purpose [1]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Adelaide South Australia Business 135.
## 2 1998 Q2 Adelaide South Australia Business 110.
## 3 1998 Q3 Adelaide South Australia Business 166.
## 4 1998 Q4 Adelaide South Australia Business 127.
## 5 1999 Q1 Adelaide South Australia Business 137.
## 6 1999 Q2 Adelaide South Australia Business 200.
## 7 1999 Q3 Adelaide South Australia Business 169.
## 8 1999 Q4 Adelaide South Australia Business 134.
## 9 2000 Q1 Adelaide South Australia Business 154.
## 10 2000 Q2 Adelaide South Australia Business 169.
# Summary statistics by region
region_summary <- tourism %>%
group_by(Region) %>%
summarise(
avg_trips = mean(Trips),
total_trips = sum(Trips),
n_quarters = n(),
.groups = 'drop'
)
region_summary## # A tsibble: 6,080 x 5 [1Q]
## # Key: Region [76]
## Region Quarter avg_trips total_trips n_quarters
## <chr> <qtr> <dbl> <dbl> <int>
## 1 Adelaide 1998 Q1 165. 659. 4
## 2 Adelaide 1998 Q2 112. 450. 4
## 3 Adelaide 1998 Q3 148. 593. 4
## 4 Adelaide 1998 Q4 131. 524. 4
## 5 Adelaide 1999 Q1 137. 548. 4
## # ℹ 6,075 more rows
# Summary by purpose
purpose_summary <- tourism %>%
group_by(Purpose) %>%
summarise(
avg_trips = mean(Trips),
total_trips = sum(Trips),
.groups = 'drop'
)
purpose_summary## # A tsibble: 320 x 4 [1Q]
## # Key: Purpose [4]
## Purpose Quarter avg_trips total_trips
## <chr> <qtr> <dbl> <dbl>
## 1 Business 1998 Q1 47.4 3599.
## 2 Business 1998 Q2 49.0 3724.
## 3 Business 1998 Q3 57.3 4356.
## 4 Business 1998 Q4 49.9 3796.
## 5 Business 1999 Q1 43.9 3335.
## # ℹ 315 more rows
We visualize (1) overall tourism, (2) by purpose, (3) top 4 regions, and (4) seasonal patterns by purpose.
# 1. Overall time series (use tsibble's index_by on the time index)
overall <- tourism %>%
index_by(Quarter) %>%
summarise(total_trips = sum(Trips))
p1 <- ggplot(overall, aes(x = Quarter, y = total_trips)) +
geom_line(color = "blue", linewidth = 1) +
labs(title = "Total Australian Tourism Over Time",
x = "Quarter", y = "Total Trips") +
theme_minimal()
# 2. By purpose (aggregate by index, then group by Purpose)
by_purpose <- tourism %>%
index_by(Quarter) %>%
group_by(Purpose) %>%
summarise(total_trips = sum(Trips), .groups = "drop_last")
p2 <- ggplot(by_purpose, aes(x = Quarter, y = total_trips, color = Purpose)) +
geom_line(linewidth = 0.8) +
labs(title = "Tourism by Purpose",
x = "Quarter", y = "Total Trips") +
theme_minimal() +
theme(legend.position = "bottom")
# 3. Top 4 regions (compute on a plain tibble to avoid index constraints)
top_regions <- tourism %>%
as_tibble() %>%
group_by(Region) %>%
summarise(total = sum(Trips), .groups = "drop") %>%
arrange(desc(total)) %>%
slice_head(n = 4) %>%
pull(Region)
# Then aggregate by index and group by Region for plotting
by_region <- tourism %>%
filter(Region %in% top_regions) %>%
index_by(Quarter) %>%
group_by(Region) %>%
summarise(total_trips = sum(Trips), .groups = "drop_last")
p3 <- ggplot(by_region, aes(x = Quarter, y = total_trips, color = Region)) +
geom_line(linewidth = 0.8) +
labs(title = "Tourism in Top 4 Regions",
x = "Quarter", y = "Total Trips") +
theme_minimal() +
theme(legend.position = "bottom")
# 4. Seasonal patterns by purpose (this step does not need index_by)
seasonal_patterns <- tourism %>%
mutate(Year = year(Quarter),
Qtr = quarter(Quarter)) %>%
group_by(Purpose, Qtr) %>%
summarise(avg_trips = mean(Trips), .groups = "drop")
p4 <- ggplot(seasonal_patterns, aes(x = factor(Qtr), y = avg_trips, fill = Purpose)) +
geom_col(position = "dodge") +
labs(title = "Average Quarterly Tourism by Purpose",
x = "Quarter", y = "Average Trips") +
theme_minimal() +
theme(legend.position = "bottom")
grid.arrange(p1, p2, p3, p4, ncol = 2)# 1. Aggregate to yearly data
yearly_data <- tourism %>%
mutate(Year = year(Quarter)) %>%
group_by(Region, Purpose, Year) %>%
summarise(
total_trips = sum(Trips),
avg_trips = mean(Trips),
quarterly_count = n(),
.groups = 'drop'
)
head(yearly_data, 10)## # A tsibble: 10 x 7 [1Q]
## # Key: Region, Purpose, Year [3]
## Region Purpose Year Quarter total_trips avg_trips quarterly_count
## <chr> <chr> <dbl> <qtr> <dbl> <dbl> <int>
## 1 Adelaide Business 1998 1998 Q1 135. 135. 1
## 2 Adelaide Business 1998 1998 Q2 110. 110. 1
## 3 Adelaide Business 1998 1998 Q3 166. 166. 1
## 4 Adelaide Business 1998 1998 Q4 127. 127. 1
## 5 Adelaide Business 1999 1999 Q1 137. 137. 1
## 6 Adelaide Business 1999 1999 Q2 200. 200. 1
## 7 Adelaide Business 1999 1999 Q3 169. 169. 1
## 8 Adelaide Business 1999 1999 Q4 134. 134. 1
## 9 Adelaide Business 2000 2000 Q1 154. 154. 1
## 10 Adelaide Business 2000 2000 Q2 169. 169. 1
# 2. Aggregate by region and purpose
region_purpose_summary <- tourism %>%
group_by(Region, Purpose) %>%
summarise(
total_trips = sum(Trips),
avg_trips = mean(Trips),
first_quarter = min(Quarter),
last_quarter = max(Quarter),
quarters_count = n(),
.groups = 'drop'
)
head(region_purpose_summary, 10)## # A tsibble: 10 x 8 [1Q]
## # Key: Region, Purpose [1]
## Region Purpose Quarter total_trips avg_trips first_quarter last_quarter
## <chr> <chr> <qtr> <dbl> <dbl> <qtr> <qtr>
## 1 Adelaide Business 1998 Q1 135. 135. 1998 Q1 1998 Q1
## 2 Adelaide Business 1998 Q2 110. 110. 1998 Q2 1998 Q2
## 3 Adelaide Business 1998 Q3 166. 166. 1998 Q3 1998 Q3
## 4 Adelaide Business 1998 Q4 127. 127. 1998 Q4 1998 Q4
## 5 Adelaide Business 1999 Q1 137. 137. 1999 Q1 1999 Q1
## 6 Adelaide Business 1999 Q2 200. 200. 1999 Q2 1999 Q2
## 7 Adelaide Business 1999 Q3 169. 169. 1999 Q3 1999 Q3
## 8 Adelaide Business 1999 Q4 134. 134. 1999 Q4 1999 Q4
## 9 Adelaide Business 2000 Q1 154. 154. 2000 Q1 2000 Q1
## 10 Adelaide Business 2000 Q2 169. 169. 2000 Q2 2000 Q2
## # ℹ 1 more variable: quarters_count <int>
We focus on one time series: Business trips in Sydney.
# Filter: Business trips in Sydney
sydney_business <- tourism %>%
filter(Region == "Sydney", Purpose == "Business") %>%
as_tsibble(index = Quarter)
# Convert to ts object for decomposition
ts_sydney <- ts(sydney_business$Trips, frequency = 4)
# Perform STL decomposition
decomp_sydney <- stl(ts_sydney, s.window = "periodic")
# Plot decomposition
par(mfrow = c(4, 1), mar = c(3, 4, 2, 2))
plot(decomp_sydney, main = "STL Decomposition: Business Trips in Sydney")# Extract components
components_df <- tibble(
Quarter = sydney_business$Quarter,
Observed = sydney_business$Trips,
Trend = as.numeric(decomp_sydney$time.series[, "trend"]),
Seasonal = as.numeric(decomp_sydney$time.series[, "seasonal"]),
Remainder = as.numeric(decomp_sydney$time.series[, "remainder"])
)
# Decomposition statistics
components_df %>%
summarise(
Observed_Mean = mean(Observed),
Trend_Mean = mean(Trend),
Seasonal_Mean = mean(Seasonal),
Seasonal_Amp = max(Seasonal) - min(Seasonal),
Remainder_SD = sd(Remainder)
)## # A tibble: 1 × 5
## Observed_Mean Trend_Mean Seasonal_Mean Seasonal_Amp Remainder_SD
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 602. 602. 0.00000178 132. 63.9
# Visualize components
components_long <- components_df %>%
pivot_longer(cols = -Quarter, names_to = "Component", values_to = "Value")
p_decomp <- ggplot(components_long, aes(x = Quarter, y = Value)) +
geom_line(color = "blue", linewidth = 0.8) +
facet_wrap(~Component, scales = "free_y", ncol = 1) +
labs(title = "Business Trips in Sydney - Component Analysis", x = "Quarter", y = "Trips") +
theme_minimal()
p_decompWe create an interactive time series plot using plotly.
# Prepare data for interactive plot (convert to plain tibble to avoid tsibble index constraints)
interactive_data <- tourism %>%
as_tibble() %>% # <-- drop tsibble semantics
group_by(Quarter, Purpose) %>%
summarise(total_trips = sum(Trips), .groups = "drop")
# Create interactive plot
p_interactive <- plot_ly(
interactive_data,
x = ~Quarter, y = ~total_trips, color = ~Purpose,
type = "scatter", mode = "lines",
hovertemplate = paste(
"Quarter: %{x}<br>",
"Trips: %{y:,}<br>",
"Purpose: %{text}<extra></extra>"
),
text = ~Purpose
) %>%
layout(
title = "Australian Tourism by Purpose (Interactive)",
xaxis = list(title = "Quarter"),
yaxis = list(title = "Total Trips"),
hovermode = "x unified"
)
p_interactiveYou have learned how to:
Date and POSIXct classesymd(),
dmy(), mdy())year(), month(),
wday())seq()group_by() and summarise() for
aggregationdecompose()Key functions to remember:
- lubridate: ymd(), floor_date(),
year(), month(), wday()
- dplyr: group_by(), summarise(),
mutate()
- ggplot2: ggplot(), geom_line(),
geom_col()
- forecast: decompose(), stl()
- tsibble: as_tsibble() for tidy time series
Below are some helpful online references for deepening your understanding of time series analysis, R time series packages, visualization tools, and forecasting techniques.
This material is part of the training program by The National Centre for Research Methods © NCRM authored by Dr Somnath Chaudhuri (University of Southampton). Content is under a CC BY‑style permissive license and can be freely used for educational purposes with proper attribution.